
amphiData <- read.csv(file = "./Data/Amphibians_in_trade.csv",
                      stringsAsFactors = FALSE)

# Temporal online data ----------------------------------------------------

library(lubridate)
library(dplyr)
library(stringr)
library(ggpubr)
library(scico)

extractFilesTemp <- list.files("./Data/TemporalData", pattern = "KEYWORD_EXTRACT",
                               recursive = TRUE, full.names = TRUE)

extractDataTemp <- do.call(rbind, lapply(extractFilesTemp, function(x){
  df <- read.csv(x, stringsAsFactors = FALSE)
  if(dim(df)[2] > 1){
    return(df)
  }
}))

wayData <- read.csv(file = "./Data/TemporalData/wayback_terraristik_results.csv",
                    stringsAsFactors = FALSE)

pageDates <- wayData %>% 
  mutate(page = row_number()) %>% 
  select(timestamp.parse, page)

temporalDataOnline <- inner_join(extractDataTemp, pageDates)

temporalDataOnline$timestamp.parse <- as.POSIXct(temporalDataOnline$timestamp.parse)
temporalDataOnline$year <- year(temporalDataOnline$timestamp.parse)

write.csv(x = temporalDataOnline,
          file = "./Data/Temporal Online Data.csv", row.names = FALSE)

onlinePlotData <- temporalDataOnline %>% 
  group_by(year) %>% 
  summarise(nSpecies = length(unique(sp))) %>% 
  mutate(source = "Online")

# LEMIS data convert names ------------------------------------------------

lemisData <- read.csv(file = "./Data/LEMISData/LEMIS_amphibian_data.csv",
                      stringsAsFactors = FALSE)

lemisData <- lemisData %>% 
  mutate(lemisName = str_to_sentence(paste(genus, species))) %>% 
  filter(!str_detect(lemisName, "sp\\.$") & !lemisName == "NA NA")

# create a vector of names then check if they appear in the LEMIS database
amphiData$lemisName <- apply(amphiData, 1, function(x){
  nms <- c(x["amphiName"], x["synonymies"])
  nms <- nms[!nms == ""]
  return(ifelse(any(nms %in% lemisData$lemisName),
                lemisData$lemisName[lemisData$lemisName %in% nms],
                                    FALSE))
})

lemisData$amphiName <- apply(lemisData, 1, function(x){
  amphiData$amphiName[amphiData$lemisName == x["lemisName"]][1]
})

write.csv(x = lemisData,
          file = "./Data/LEMIS Data AmphiNames.csv", row.names = FALSE)

lemisPlotData <- lemisData %>% 
  select(amphiName, "year" = shipment_year) %>% 
  group_by(year) %>% 
  summarise(nSpecies = length(unique(amphiName))) %>% 
  mutate(source = "LEMIS")

# CITES trade db name convert ---------------------------------------------

citesData <- read.csv(file = "./Data/CITESData/gross_imports_2020-09-20 15_25_comma_separated.csv",
                      stringsAsFactors = FALSE)

citesData <- citesData %>% 
  filter(!str_detect(Taxon, "spp\\.$"))

# create a vector of names then check if they appear in the CITES database
amphiData$citesName <- apply(amphiData, 1, function(x){
  nms <- c(x["amphiName"], x["synonymies"])
  nms <- nms[!nms == ""]
  return(ifelse(any(nms %in% citesData$Taxon),
                citesData$Taxon[citesData$Taxon %in% nms], FALSE))
})

citesData$amphiName <- apply(citesData, 1, function(x){
  amphiData$amphiName[amphiData$citesName == x["Taxon"]][1]
})

names(citesData)

citesData <- citesData %>% 
  select(-App., -Taxon, -Term, -Unit, -Country)

for(i in 1:nrow(citesData)){
  citesData[i,!is.na(citesData[i,])] <- citesData[i,"amphiName"]
}
  
citesPlotData <- as.data.frame(apply(citesData, 2, function(x){
  length(unique(x[!is.na(x)]))
}))

citesPlotData$year <- as.numeric(sub("X", "", row.names(citesPlotData)))
names(citesPlotData) <- c("nSpecies", "year")
citesPlotData <- citesPlotData[!is.na(citesPlotData$year),]
citesPlotData$source <- "CITES"


# Combine data and plot ---------------------------------------------------

library(ggplot2)

tempPlotData <- rbind(onlinePlotData, lemisPlotData, citesPlotData)

# Raw counts plot ---------------------------------------------------------

rawCountsPlot <- tempPlotData %>% 
  filter(year < 2020) %>% 
  ggplot() +
  geom_line(aes(x = year, y = nSpecies, colour = source),
            size = 1.2) +
  geom_text(data = tempPlotData %>%
              group_by(source) %>% 
              filter(year == 2000),
            aes(x = year, y = nSpecies, colour = source, label = source),
            fontface = 2, nudge_y = -5, nudge_x = 0.1, hjust = 0, vjust = 1) +
  geom_text(data = tempPlotData %>%
              group_by(source) %>% 
              filter((year == 2004 & source == "Online")),
            aes(x = year, y = nSpecies, colour = source, label = source),
            fontface = 2, nudge_y = -5, nudge_x = 0.1, hjust = 0.5, vjust = 1) +
  coord_cartesian(xlim = c(2000, 2019),
                  ylim = c(0, 350), clip = "on") +
  scale_colour_scico_d(palette = "roma") +
  scale_x_continuous(breaks = seq(2000, 2020, 1),
                     labels = sub("^..", "'", seq(2000, 2020, 1)),
                     minor_breaks = NULL,
                     expand = expansion(c(0,0), c(1,1))) +
  labs(x = "Year", y = "# of\nspecies") +
  theme_bw() +
  theme(panel.background = element_blank(),
        panel.border = element_blank(),
        legend.position = "none",
        axis.line = element_line(),
        axis.title = element_text(face = 2),
        axis.title.y = element_text(angle = 0, hjust = 1))

rawCountsPlot

# Detrended against search effort -----------------------------------------

trendData <- temporalDataOnline %>% 
  filter(!year >= 2019) %>% 
  group_by(year) %>% 
  summarise(nSpecies = length(unique(sp)),
            nPages = n())

linReg <- lm(trendData$nSpecies ~ trendData$nPages)

linReg$df.residual
linReg$coefficients

trendData$residuals <- linReg$residuals

trendPlot <- trendData %>% 
  ggplot() +
  geom_point(aes(x = year, y = residuals, colour = residuals < 0),
             size = 3) +
  geom_segment(aes(x = year, xend = year, y = 0, yend = residuals,
                   colour = residuals < 0),
               size = 1.2) +
  scale_x_continuous(limits = c(2000, 2019),
                     breaks = seq(2000, 2018, 1), minor_breaks = NULL) +
  labs(x = "Year",
       y = "Residuals"
  ) +
  theme_bw() +
  theme(legend.position = "none",
        panel.background = element_blank(),
        panel.border = element_blank(),
        axis.line = element_line(),
        axis.title = element_text(face = 2),
        axis.title.y = element_text(angle = 0, hjust = 1)) +
  scale_colour_scico_d(palette = "roma")

onlineSpPlot <- trendData %>% 
  ggplot() +
  geom_point(aes(x = year, y = nSpecies),
             size = 3) +
  geom_segment(aes(x = year, xend = year, y = 0, yend = nSpecies),
               size = 1.2) +
  scale_x_continuous(limits = c(2000, 2019),
    breaks = seq(2000, 2018, 1), minor_breaks = NULL) +
  labs(x = "Year",
       y = "# of\nspecies"
  ) +
  theme_bw() +
  theme(legend.position = "none",
        panel.background = element_blank(),
        panel.border = element_blank(),
        axis.line = element_line(),
        axis.title = element_text(face = 2),
        axis.title.y = element_text(angle = 0, hjust = 1)) +
  scale_colour_scico_d(palette = "roma")

onlinePagePlot <- trendData %>% 
  ggplot() +
  geom_point(aes(x = year, y = nPages),
             size = 3) +
  geom_segment(aes(x = year, xend = year, y = 0, yend = nPages),
               size = 1.2) +
  scale_x_continuous(limits = c(2000, 2019),
                     breaks = seq(2000, 2020, 1),
                     labels = sub("^..", "'", seq(2000, 2020, 1)),
                     minor_breaks = NULL,
                     expand = expansion(c(0,0), c(1,1))) +
  coord_cartesian(clip = "off") +
  labs(x = "Year",
       y = "# of\npages"
  ) +
  theme_bw() +
  theme(legend.position = "none",
        panel.background = element_blank(),
        panel.border = element_blank(),
        axis.line = element_line(),
        axis.title = element_text(face = 2),
        axis.title.y = element_text(angle = 0, hjust = 1)) +
  scale_colour_scico_d(palette = "roma")

# trendPanel <- ggarrange(trendPlot +
#             rremove("x.title") +
#             rremove("x.text")
#           ,
#           onlineSpPlot +
#             rremove("x.title") +
#             rremove("x.text")
#           ,
#           onlinePagePlot,
#           ncol = 1, align = "v",
#           heights = c(3,1,1.3))
# 
# trendPanel

# Species unique to years -------------------------------------------------

## ONLINE TRADE SPP per year
onlineTempSpp <- temporalDataOnline %>% 
  group_by(year, sp) %>% 
  slice(n = 1) %>% 
  select("amphiName" = sp, year) %>% 
  mutate(source = "Online") %>% 
  ungroup()

## LEMIS SPP per year 
lemisTempSpp <- lemisData %>% 
  group_by(shipment_year, amphiName) %>% 
  slice(n = 1) %>% 
  select("year" = shipment_year, amphiName) %>% 
  mutate(source = "LEMIS") %>% 
  ungroup()

## CITES SPP per year
citesTempSppList <- apply(citesData[,26:45], 2, function(x){
  unique(x[!is.na(x)])
})

citesTempSpp <- do.call(rbind, lapply(names(citesTempSppList), function(x){
  data.frame(year = sub("X", "", x), amphiName = citesTempSppList[[x]],
             source = "CITES")
}))

tempSppDF <- rbind(onlineTempSpp, lemisTempSpp, citesTempSpp)

# make sure that each species in each year appears once, essentially removing
# the source data meaning
tempSppDF <- tempSppDF %>% 
  group_by(year, amphiName) %>% 
  slice(n = 1)

i <- 0
uniTempSpp <- list()
for(y in unique(tempSppDF$year)){
  # y <- 2006
  i <- i+1
  noty <- unique(tempSppDF$amphiName[!tempSppDF$year == y])
  spy <- unique(tempSppDF$amphiName[tempSppDF$year == y])
  
  uniTempSpp[[i]] <- data.frame("year" = y, "nUniSpp" = sum(!spy %in% noty))
  
}#for end
uniTempSpp <- do.call(rbind, uniTempSpp)
uniTempSpp$year <- as.numeric(as.character(uniTempSpp$year))

uniSppPlot <- uniTempSpp %>% 
  ggplot() +
  geom_segment(aes(x = year, xend = year, y = 0, yend = nUniSpp),
               size = 1.2) +
  geom_point(aes(x = year, y = nUniSpp),
             size = 3) +
  scale_x_continuous(breaks = seq(2000, 2020, 1),
                     labels = sub("^..", "'", seq(2000, 2020, 1)),
                     minor_breaks = NULL,
                     expand = expansion(c(0,0), c(1,1))) +
  theme_bw() +
  labs(x = "Year", y = "# of unique\nspecies traded",
       colour = "") +
  scale_colour_scico_d(palette = "roma") +
  scale_y_continuous(limits = c(0,60), breaks = seq(0, 60, 20)) +
  theme(legend.position = "none",
        panel.background = element_blank(),
        panel.border = element_blank(),
        axis.line = element_line(),
        axis.title = element_text(face = 2),
        axis.title.y = element_text(angle = 0, hjust = 1))

ggarrange(rawCountsPlot  +
            theme(plot.margin = margin(5,0,0,0)) +
            rremove("x.title") +
            rremove("x.text"),
          uniSppPlot +
            theme(plot.margin = margin(2,0,0,0)),
          trendPlot +
            rremove("x.title") +
            rremove("x.text"),
          onlineSpPlot +
            rremove("x.title") +
            rremove("x.text"),
          onlinePagePlot,
          ncol = 1,
          align = "v",
          heights = c(2,2,1,1,1),
          labels = c("A", "", "B", "", ""))

ggsave("./Figures/Temporal Plot.png", width = 160, height = 220,
       units = "mm")
ggsave("./Figures/Temporal Plot.pdf", width = 160, height = 220,
       units = "mm")
